	/**************************************************************************
	* Step 1: Read the raw file
	***************************************************************************/

	data C5598    ;
		infile "/.../F5500/FOIAed/1998/C5598.txt" lrecl=3105; 
		input 
		/*redacted*/
	run;

	data F5598    ;
		infile "/.../F5500/FOIAed/1998/F5598.txt" lrecl=4142; 
		input 
		/*redacted*/
	run;

	data R5598    ;
		infile "/.../F5500/FOIAed/1998/R5598.txt" lrecl=1935; 
		input 
		/*redacted*/
	run;

	/*Stack each file*/
	data raw98;
	  set c5598 f5598 r5598;
	run;

	/**************************************************************************
	* Step 2: clean the raw file and make variable names consistent
	***************************************************************************/

	data raw98
		(keep = DB
			DC
			cash_bal
			xDB
			plan_eff_year
			plan_qtr_strt
			plan_year_strt
			plan_qtr_end
			plan_year_end
			form_year
			filing_id
			filing_id_char
			adj_active
			collective_bargain_ind
			opr_ein
			opr_pn
			partcp_account_bal_cnt_n
			pension_benefit_plan_ind
			tot_active_partcp_cnt_n
			type_pension_bnft_code
			type_plan_entity_ind
			tot_income_amt_n
			tot_contrib_amt_n
			emplr_contrib_income_amt_n
			employee_contrib_income_amt_n
			t);												
		set raw98 (rename = (COLLECTIVE_BARGN_IND=collective_bargain_ind
			  PENSION_FEATURES_CD = type_pension_bnft_code));
		if  TYPE_PENSION_BNFT_PLAN_IND in /*redacted*/ then DB=/*redacted*/;					/*has DB characteristics*/
			else DB=/*redacted*/;
		if TYPE_PENSION_BNFT_PLAN_IND=/*redacted*/ then cash_bal=/*redacted*/;
			else cash_bal=/*redacted*/;
		if DB=1 & cash_bal=/*redacted*/ then xDB=/*redacted*/;
			else xDB=/*redacted*/;
		if  TYPE_PENSION_BNFT_PLAN_IND in /*redacted*/ then DC=/*redacted*/;	/*has DC characteristics*/
			else DC=/*redacted*/;
		plan_name = upcase(plan_name);
		if /*redacted*/ or 
		/*redacted*/ then supplemental=/*redacted*/;	/*is supplemental*/
			else supplemental=/*redacted*/;
		if supplemental=/*redacted*/ then delete;	/*drop supplemental plans*/

		tot_active_partcp_cnt_n=/*redacted*/;
		if tot_active_partcp_cnt_n=. then tot_active_partcp_cnt_n = /*redacted*/;
		partcp_account_bal_cnt_n=/*redacted*/;
	
		form_plan_year_begin_date = left(trim(form_plan_year_begin_date));
		fpy_begin_yr = /*redacted*/; 	/*plan start year*/
		fpy_begin_mth = /*redacted*/; 	/*plan start month*/
		fpy_begin_day = /*redacted*/;	/*plan start day*/
		fpy_date = mdy(fpy_begin_mth,fpy_begin_day,fpy_begin_yr);		/*plan start sas date*/
		plan_qtr_strt = qtr(fpy_date);						/*plan calendar start qtr*/
		plan_year_strt = year(fpy_date);					/*plan calendar start year*/

		form_tax_prd = left(trim(FORM_PLAN_YEAR_END_DATE)); 
		ftp_yr = /*redacted*/; 				/*plan end year*/
		ftp_mth = /*redacted*/; 				/*plan end month*/
		ftp_day = /*redacted*/;								/*plan end day*/
		ftp_date = mdy(ftp_mth,ftp_day,ftp_yr);					/*plan end sas date*/
		plan_qtr_end = qtr(ftp_date);						/*plan calendar end qtr*/
		plan_year_end = year(ftp_date);						/*plan calendar end year*/

		form_year = /*redacted*/;						/*rename for consistency with research file*/
		filing_id_char = dln;						/*string filing id=DLN*/
		filing_id = /*redacted*/;				/*numeric filing id*/
		
		adj_active = tot_active_partcp_cnt_n;				/*no editing for the raw file*/
		opr_ein = left(trim(spons_dfe_ein));				/*rename for consistency with research file*/
		opr_pn = left(trim(spons_dfe_pn));				/*rename for consistency with research file*/
		len_pn = length(opr_pn);					/*length of pn field*/
		if len_pn=/*redacted*/ then opr_pn = /*redacted*/;
		else if len_pn=/*redacted*/ then opr_pn = /*redacted*/;
		/*Redacted: removing invalid EIN/PNs*/

		plan_eff_year = /*redacted*/;		/*year that the plan started*/
	
		type_plan_entity_ind = left(trim(type_plan_entity_ind));
		if type_plan_entity_ind^=/*redacted*/ then delete;			/*keep only single employer plans*/
	
		if DB=/*redacted*/ and DC=/*redacted*/ then delete; 					/*remove welfare plans*/
		pension_benefit_plan_ind = /*redacted*/;					/*all plans are now pension plans*/
		t = /*redacted*/;								/*helper used for de-duplication step*/
	run;

	/*Set variable lengths to enable error-free appending with the research file*/
	data raw98;
	
		/*redacted*/
		
		set raw98;
	run;

	/**************************************************************************
	* Step 3: De-duplicate the file 
	***************************************************************************/
	
	proc sort data = raw98;
		by opr_ein opr_pn plan_year_end;
	run;
	
	proc means noprint data = raw98;
		by opr_ein opr_pn plan_year_end;
		var filing_id;
		output out = best_filing 
		max(filing_id) = max_filing_id;
	run;
	
	proc sql;
		create table clean98 as
		select a.*, b.max_filing_id
		from raw98 as a inner join
		best_filing as b
		on a.filing_id = b.max_filing_id;
	quit;

	data  clean98;
		set clean98 (drop= t max_filing_id);
		source_id = /*redacted*/; /*id for raw file*/
	run;

	/*Remove exact filing_id dups*/
	proc sort data = clean98 nodupkey;
	      by opr_ein opr_pn plan_year_end filing_id_char;
	run;

	/**************************************************************************
	* Step 4: Count up # DB plans per EIN
	***************************************************************************/
	
	proc sort data = clean98;
		by opr_ein;
	run;
	
	proc means noprint data = clean98;
		by opr_ein;
		var DB;				
		output out = plan_count98
			sum(DB) = num_DB_plans;
	run;
	
	/*Check how many participants are in firms with 2+ DB plans*/
	proc sql;
		create table pens98_plncnt as 
		select *
		from clean98 as t,
		plan_count98 as m
		where t.opr_ein = m.opr_ein;
	quit;
		
	data blue.pensplan_samp98 (rename = 
	     (tot_active_partcp_cnt_n=tot_active_partcp_cnt
	      partcp_account_bal_cnt_n=partcp_account_bal_cnt));
		set pens98_plncnt
		(drop = _type_ _freq_ xDB);
		adj_DB_partcp = DB*adj_active;
		DB_partcp = DB*tot_active_partcp_cnt_n;
	run;

	/*De-dup small number of cases where filing errors create EIN-PN_Plan year end dups
	  i.e. the filing_id's are different but the plan years are mis-aligned*/
	proc sort data = blue.pensplan_samp98 nodupkey;
	  by opr_ein opr_pn plan_year_end;
	run;
	
	/*Clean up the working directory*/
	proc datasets lib=work nolist kill;
	quit;
	run;